import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import matplotlib.dates as mdates
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook"
For this excercise, we have written the following code to load the stock dataset built into plotly express.
stocks = px.data.stocks()
stocks.head()
| date | GOOG | AAPL | AMZN | FB | NFLX | MSFT | |
|---|---|---|---|---|---|---|---|
| 0 | 2018-01-01 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| 1 | 2018-01-08 | 1.018172 | 1.011943 | 1.061881 | 0.959968 | 1.053526 | 1.015988 |
| 2 | 2018-01-15 | 1.032008 | 1.019771 | 1.053240 | 0.970243 | 1.049860 | 1.020524 |
| 3 | 2018-01-22 | 1.066783 | 0.980057 | 1.140676 | 1.016858 | 1.307681 | 1.066561 |
| 4 | 2018-01-29 | 1.008773 | 0.917143 | 1.163374 | 1.018357 | 1.273537 | 1.040708 |
Select a stock and create a suitable plot for it. Make sure the plot is readable with relevant information, such as date, values.
# YOUR CODE HERE
plot = stocks.plot(x = 'date', y = 'AAPL')
plot.set_ylabel('stock value')
plot.set_title('AAPL')
plt.rcParams["figure.figsize"] = (15,10)
plt.show()
You've already plot data from one stock. It is possible to plot multiples of them to support comparison.
To highlight different lines, customise line styles, markers, colors and include a legend to the plot.
# YOUR CODE HERE
plot = stocks.plot(x = 'date', y = ['AAPL','GOOG','AMZN','FB','NFLX','MSFT'])
plot.set_ylabel('stock value')
plot.set_title('Stocks')
plt.rcParams["figure.figsize"] = (15,10)
First, load the tips dataset
tips = sns.load_dataset('tips')
tips.head()
| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
Let's explore this dataset. Pose a question and create a plot that support drawing answers for your question.
Some possible questions:
# YOUR CODE HERE
print('On which day(s) employees receive the most tip and is this correlated to the type of dinner on that day?')
plot = sns.FacetGrid(tips, col='day', hue='time')
plot.map(sns.scatterplot, 'total_bill', 'tip')
plot.add_legend()
plt.show()
On which day(s) employees receive the most tip and is this correlated to the type of dinner on that day?
Redo the above exercises (challenges 2 & 3) with plotly express. Create diagrams which you can interact with.
Hints:
# YOUR CODE HERE
df = px.data.stocks()
fig = px.line(df, x="date", y=['AAPL','GOOG','AMZN','FB','NFLX','MSFT'])
fig.show()
# YOUR CODE HERE
df = px.data.tips()
print('On which day(s) employees receive the most tip and is this correlated to the type of dinner on that day?')
fig = px.scatter(df, x="total_bill", y="tip", color="time", facet_col="day")
fig.show()
On which day(s) employees receive the most tip and is this correlated to the type of dinner on that day?
Recreate the barplot below that shows the population of different continents for the year 2007.
Hints:
#load data
df = px.data.gapminder()
df.head()
| country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num | |
|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | Asia | 1952 | 28.801 | 8425333 | 779.445314 | AFG | 4 |
| 1 | Afghanistan | Asia | 1957 | 30.332 | 9240934 | 820.853030 | AFG | 4 |
| 2 | Afghanistan | Asia | 1962 | 31.997 | 10267083 | 853.100710 | AFG | 4 |
| 3 | Afghanistan | Asia | 1967 | 34.020 | 11537966 | 836.197138 | AFG | 4 |
| 4 | Afghanistan | Asia | 1972 | 36.088 | 13079460 | 739.981106 | AFG | 4 |
# YOUR CODE HERE
year_2007 = df['year'] == 2007
df[year_2007]
| country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num | |
|---|---|---|---|---|---|---|---|---|
| 11 | Afghanistan | Asia | 2007 | 43.828 | 31889923 | 974.580338 | AFG | 4 |
| 23 | Albania | Europe | 2007 | 76.423 | 3600523 | 5937.029526 | ALB | 8 |
| 35 | Algeria | Africa | 2007 | 72.301 | 33333216 | 6223.367465 | DZA | 12 |
| 47 | Angola | Africa | 2007 | 42.731 | 12420476 | 4797.231267 | AGO | 24 |
| 59 | Argentina | Americas | 2007 | 75.320 | 40301927 | 12779.379640 | ARG | 32 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1655 | Vietnam | Asia | 2007 | 74.249 | 85262356 | 2441.576404 | VNM | 704 |
| 1667 | West Bank and Gaza | Asia | 2007 | 73.422 | 4018332 | 3025.349798 | PSE | 275 |
| 1679 | Yemen, Rep. | Asia | 2007 | 62.698 | 22211743 | 2280.769906 | YEM | 887 |
| 1691 | Zambia | Africa | 2007 | 42.384 | 11746035 | 1271.211593 | ZMB | 894 |
| 1703 | Zimbabwe | Africa | 2007 | 43.487 | 12311143 | 469.709298 | ZWE | 716 |
142 rows × 8 columns
df_data_2007 = df.query('year == 2007')
df_data_2007_new = df_data_2007.groupby('continent').sum()
fig = px.bar(df_data_2007_new, x = 'pop', orientation = 'h', color = df_data_2007_new.index, text_auto = '.4s')
fig.update_yaxes(categoryorder = 'total ascending')
fig.show()